/*
Builds sample for data-driven classification of clauses
	// input: pageranks_gender_2009_2016, akm_fe_gender_2009_2016, database_prepanel, contracts_cnes
	//			union_category, union_list_clean
	// output: amenities_gender_sample

*/

cap log close
cap log using "$logs/amenities_gender_sample_log", replace 


**********************
**QUICK DESCRIPTIVES**
**********************

* Obtain share of workers covered by CNM, CNTSS, CONTRACS, FITRATELP
// Establishment-year panel with info on CBAs signed by establishment's main union
use "$files\unions\database_prepanel" , clear
rename union_mode union_id
// Obtain union's category to determine "sectors"
merge m:1 union_id using "$raw\CNES\union_category"
drop if _merge==2
drop _merge
gen d2_het = 0
replace d2_het = 1 if ///
	(c_subgroup=="metalurgica") | ///
	(c_subgroup=="saude") | ///
	(c_subgroup=="atacadista"|c_subgroup=="varejista"|c_subgroup=="autonomo") | ///
	(c_subgroup=="comunica")  
// Obtain union's affilaition to a central 
merge m:1 union_id using "$files\union_list_clean", keepusing(union_id CUT)
drop if _merge==2
drop _merge
keep estabid start_year union_id workers d2_het CUT
collapse (mean) workers (max) d2_het CUT, by(estabid) fast
// CNM, CNTSS, CONTRACS, FITRATELP if union is affiliated to CUT and it's of the relevant category
gen cnm = (d2_het==1)&(CUT==1)
// Share among all workers
preserve
	collapse (sum) workers, by(cnm)
	qui sum workers if cnm==1
	local x1 = r(mean)
	qui sum workers if cnm==0
	local x2 = r(mean)
	di `x1'/(`x1'+`x2')
restore
// Share among workers in these categories
preserve
	keep if d2_het==1
	collapse (sum) workers, by(cnm)
	qui sum workers if cnm==1
	local x1 = r(mean)
	qui sum workers if cnm==0
	local x2 = r(mean)
	di `x1'/(`x1'+`x2')
restore


****************
**BUILD SAMPLE**
****************

* PageRanks (must have estimates for both genders)
use "$files\pageranks_gender_2009_2016", clear
rename empid_est estabid
egen xxx = tag(estabid gender)
egen yyy = sum(xxx), by(estabid)
gen both_gender = (yyy==2)
drop xxx yyy
unique estabid if gender==1
unique estabid if gender==2
unique estabid if both_gender==1
keep if both_gender==1 
drop both_gender
tempfile fes
save `fes'

* AKM establishment fixed effects (must have estimates for both genders)
use "$files\akm_fe_gender_2009_2016", clear
rename empid_est estabid
egen xxx = tag(estabid gender)
egen yyy = sum(xxx), by(estabid)
gen both_gender = (yyy==2)
drop xxx yyy
unique estabid if gender==1
unique estabid if gender==2
unique estabid if both_gender==1
keep if both_gender==1 
drop both_gender

* Prepare PageRanks and wage premiums (2009-2016)
merge 1:1 estabid gender using `fes'
keep if _merge==3
drop _merge
unique estabid if gender==1
unique estabid if gender==2
//unique establishment observation
gen xxx = fe if gender==1
egen fe_m = max(xxx), by(estabid)
drop xxx
gen xxx = fe if gender==2
egen fe_f = max(xxx), by(estabid)
drop xxx
gen xxx = pagerank if gender==1
egen pagerank_m = max(xxx), by(estabid)
drop xxx
gen xxx = pagerank if gender==2
egen pagerank_f = max(xxx), by(estabid)
drop xxx
keep estabid fe_m fe_f pagerank_m pagerank_f
duplicates drop
//save
mdesc
tempfile ranks
save `ranks'


* Prepare clauses (amenities)
use "$files\unions\contracts_cnes", clear
keep contract_id act cl_* 
drop cl_0*
duplicates drop
tempfile amenities
save `amenities'


********************
**MERGE INTO PANEL**
********************

* Establishment-year panel with info on CBAs signed by establishment's main union
use "$files\unions\database_prepanel" , clear
//focus on sectoral CBAs only 
keep if act==0
keep estabid start_year contract_id union_mode municipality cnae20subcl
rename start_year year
count if year==2014
unique contract_id if year==2014
unique estabid if year==2014
preserve
	gen state = floor(municipality/1e4)
	gen ind = floor(cnae20subcl/1e5)
	egen xxx = tag(contract_id state ind)
	collapse (sum) xxx, by(state ind)
	sum xxx, d
restore
//merge clauses (amenities)
merge m:1 contract_id using `amenities'
drop if _merge==2
drop _merge
xtset estabid year
//restrict to 2009-2016
keep if (year>=2009)&(year<=2016)
tab year
//collapse to the establishment level
collapse (count) cnt=year ///
		 (first) union_mode municipality cnae20subcl ///
		 (mean) cl_* , by(estabid) fast
//merge in PageRanks and AKM wage premiums
merge m:1 estabid using `ranks'
keep if _merge==3
drop _merge
tab cnt

* Intuitive classification of female clauses
egen maternity_clauses =rowtotal(cl_23aux_cre cl_23aux_mat cl_43est_abo cl_43est_mae cl_43est_pai cl_43pol_par)
egen harassment_clauses=rowtotal(cl_42ass_sex cl_42igu_opo)
egen flexibility2_clauses = rowtotal(cl_51con_jor cl_51jor_esp cl_51sob cl_51tur_ini cl_32con_tem)
egen leave_clauses =rowtotal(cl_33maoobr_fem cl_72lic_abo cl_72lic_ado cl_72lic_mat cl_72lic_nao cl_72lic_rem cl_73out_dis)
gen female_clauses =  maternity_clauses + harassment_clauses + flexibility2_clauses + leave_clauses

//save
sort estabid
save "$files\amenities_gender_sample", replace


log close
